1 Read log data

  • readr::read_log is more forgiving than data.table::fread
library(readr)
## 
## Attaching package: 'readr'
## The following objects are masked from 'package:scales':
## 
##     col_factor, col_numeric
rawlogs<-read_log("https://raw.githubusercontent.com/elastic/examples/master/ElasticStack_apache/apache_logs")
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_character(),
##   X5 = col_character(),
##   X6 = col_integer(),
##   X7 = col_integer(),
##   X8 = col_character(),
##   X9 = col_character()
## )
## Warning: 16 parsing failures.
##  row col   expected     actual
## 4031  -- 9 columns  5 columns 
## 4192  -- 9 columns  5 columns 
## 8897  X6 an integer U;        
## 8897  X7 an integer )         
## 8897  -- 9 columns  11 columns
## .... ... .......... ..........
## See problems(...) for more details.
library(data.table)
logs<-data.table(rawlogs)
knitr::kable(head(logs))
X1 X2 X3 X4 X5 X6 X7 X8 X9
83.149.9.216 NA NA 17/May/2015:10:05:03 +0000 GET /presentations/logstash-monitorama-2013/images/kibana-search.png HTTP/1.1 200 203023 http://semicomplete.com/presentations/logstash-monitorama-2013/ Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
83.149.9.216 NA NA 17/May/2015:10:05:43 +0000 GET /presentations/logstash-monitorama-2013/images/kibana-dashboard3.png HTTP/1.1 200 171717 http://semicomplete.com/presentations/logstash-monitorama-2013/ Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
83.149.9.216 NA NA 17/May/2015:10:05:47 +0000 GET /presentations/logstash-monitorama-2013/plugin/highlight/highlight.js HTTP/1.1 200 26185 http://semicomplete.com/presentations/logstash-monitorama-2013/ Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
83.149.9.216 NA NA 17/May/2015:10:05:12 +0000 GET /presentations/logstash-monitorama-2013/plugin/zoom-js/zoom.js HTTP/1.1 200 7697 http://semicomplete.com/presentations/logstash-monitorama-2013/ Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
83.149.9.216 NA NA 17/May/2015:10:05:07 +0000 GET /presentations/logstash-monitorama-2013/plugin/notes/notes.js HTTP/1.1 200 2892 http://semicomplete.com/presentations/logstash-monitorama-2013/ Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
83.149.9.216 NA NA 17/May/2015:10:05:34 +0000 GET /presentations/logstash-monitorama-2013/images/sad-medic.png HTTP/1.1 200 430406 http://semicomplete.com/presentations/logstash-monitorama-2013/ Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36

1.1 Rename columns

Logs usually don’t have headers so you need to update the default column titles to something more expressive.

setnames(logs, colnames(logs)
         ,c( "ip", "identd", "uname", "time", "request", "status", "respsize", "referer", "agent"))
# http://stackoverflow.com/questions/9234699/understanding-apache-access-log
#  %h is the remote host (ie the client IP)
# %l is the identity of the user determined by identd (not usually # used since not reliable)
# %u is the user name determined by HTTP authentication
# %t is the time the request was received.
# %r is the request line from the client. ("GET / HTTP/1.0")
# %>s is the status code sent from the server to the client (200, # 404 etc.)
# %b is the size of the response to the client (in bytes)
# Referer is the page that linked to this URL.
# User-agent is the browser identification string.

knitr::kable(head(logs))
ip identd uname time request status respsize referer agent
83.149.9.216 NA NA 17/May/2015:10:05:03 +0000 GET /presentations/logstash-monitorama-2013/images/kibana-search.png HTTP/1.1 200 203023 http://semicomplete.com/presentations/logstash-monitorama-2013/ Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
83.149.9.216 NA NA 17/May/2015:10:05:43 +0000 GET /presentations/logstash-monitorama-2013/images/kibana-dashboard3.png HTTP/1.1 200 171717 http://semicomplete.com/presentations/logstash-monitorama-2013/ Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
83.149.9.216 NA NA 17/May/2015:10:05:47 +0000 GET /presentations/logstash-monitorama-2013/plugin/highlight/highlight.js HTTP/1.1 200 26185 http://semicomplete.com/presentations/logstash-monitorama-2013/ Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
83.149.9.216 NA NA 17/May/2015:10:05:12 +0000 GET /presentations/logstash-monitorama-2013/plugin/zoom-js/zoom.js HTTP/1.1 200 7697 http://semicomplete.com/presentations/logstash-monitorama-2013/ Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
83.149.9.216 NA NA 17/May/2015:10:05:07 +0000 GET /presentations/logstash-monitorama-2013/plugin/notes/notes.js HTTP/1.1 200 2892 http://semicomplete.com/presentations/logstash-monitorama-2013/ Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
83.149.9.216 NA NA 17/May/2015:10:05:34 +0000 GET /presentations/logstash-monitorama-2013/images/sad-medic.png HTTP/1.1 200 430406 http://semicomplete.com/presentations/logstash-monitorama-2013/ Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36

2 Time handling

There is date handling capability out of the box with R, however, the lubridate package makes it easier to convert strings to dates, and perform manipulations.

library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:data.table':
## 
##     hour, mday, month, quarter, wday, week, yday, year
## The following object is masked from 'package:base':
## 
##     date
logs[,time:=dmy_hms(time)]
##                  ip identd uname                time
##    1:  83.149.9.216     NA    NA 2015-05-17 10:05:03
##    2:  83.149.9.216     NA    NA 2015-05-17 10:05:43
##    3:  83.149.9.216     NA    NA 2015-05-17 10:05:47
##    4:  83.149.9.216     NA    NA 2015-05-17 10:05:12
##    5:  83.149.9.216     NA    NA 2015-05-17 10:05:07
##   ---                                               
## 9992: 100.43.83.137     NA    NA 2015-05-20 21:05:01
## 9993:  63.140.98.80     NA    NA 2015-05-20 21:05:28
## 9994: 66.249.73.135     NA    NA 2015-05-20 21:05:00
## 9995:   180.76.6.56     NA    NA 2015-05-20 21:05:56
## 9996:  46.105.14.53     NA    NA 2015-05-20 21:05:15
##                                                                                  request
##    1:      GET /presentations/logstash-monitorama-2013/images/kibana-search.png HTTP/1.1
##    2:  GET /presentations/logstash-monitorama-2013/images/kibana-dashboard3.png HTTP/1.1
##    3: GET /presentations/logstash-monitorama-2013/plugin/highlight/highlight.js HTTP/1.1
##    4:        GET /presentations/logstash-monitorama-2013/plugin/zoom-js/zoom.js HTTP/1.1
##    5:         GET /presentations/logstash-monitorama-2013/plugin/notes/notes.js HTTP/1.1
##   ---                                                                                   
## 9992:                                                  GET /blog/tags/standards HTTP/1.1
## 9993:                                   \n63.140.98.80 - - [20/May/2015:21:05:50 +0000])
## 9994:                                                           GET /?flav=atom HTTP/1.1
## 9995:                                                           GET /robots.txt HTTP/1.1
## 9996:                                          GET /blog/tags/puppet?flav=rss20 HTTP/1.1
##       status respsize
##    1:    200   203023
##    2:    200   171717
##    3:    200    26185
##    4:    200     7697
##    5:    200     2892
##   ---                
## 9992:    200    13358
## 9993:     NA       NA
## 9994:    200    32352
## 9995:    200       NA
## 9996:    200    14872
##                                                               referer
##    1: http://semicomplete.com/presentations/logstash-monitorama-2013/
##    2: http://semicomplete.com/presentations/logstash-monitorama-2013/
##    3: http://semicomplete.com/presentations/logstash-monitorama-2013/
##    4: http://semicomplete.com/presentations/logstash-monitorama-2013/
##    5: http://semicomplete.com/presentations/logstash-monitorama-2013/
##   ---                                                                
## 9992:                                                              NA
## 9993:                                                              NA
## 9994:                                                              NA
## 9995:                                                              NA
## 9996:                                                              NA
##                                                                                                                         agent
##    1: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
##    2: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
##    3: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
##    4: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
##    5: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
##   ---                                                                                                                        
## 9992:                                                        Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)
## 9993:                                                                                                                      NA
## 9994:                                                Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)
## 9995:                                                     Mozilla/5.0 (Windows NT 5.1; rv:6.0.2) Gecko/20100101 Firefox/6.0.2
## 9996:                                                             UniversalFeedParser/4.2-pre-314-svn +http://feedparser.org/
logs[,`:=`(hour=hour(time), wday=wday(time)
           ,morning=am(time))]
##                  ip identd uname                time
##    1:  83.149.9.216     NA    NA 2015-05-17 10:05:03
##    2:  83.149.9.216     NA    NA 2015-05-17 10:05:43
##    3:  83.149.9.216     NA    NA 2015-05-17 10:05:47
##    4:  83.149.9.216     NA    NA 2015-05-17 10:05:12
##    5:  83.149.9.216     NA    NA 2015-05-17 10:05:07
##   ---                                               
## 9992: 100.43.83.137     NA    NA 2015-05-20 21:05:01
## 9993:  63.140.98.80     NA    NA 2015-05-20 21:05:28
## 9994: 66.249.73.135     NA    NA 2015-05-20 21:05:00
## 9995:   180.76.6.56     NA    NA 2015-05-20 21:05:56
## 9996:  46.105.14.53     NA    NA 2015-05-20 21:05:15
##                                                                                  request
##    1:      GET /presentations/logstash-monitorama-2013/images/kibana-search.png HTTP/1.1
##    2:  GET /presentations/logstash-monitorama-2013/images/kibana-dashboard3.png HTTP/1.1
##    3: GET /presentations/logstash-monitorama-2013/plugin/highlight/highlight.js HTTP/1.1
##    4:        GET /presentations/logstash-monitorama-2013/plugin/zoom-js/zoom.js HTTP/1.1
##    5:         GET /presentations/logstash-monitorama-2013/plugin/notes/notes.js HTTP/1.1
##   ---                                                                                   
## 9992:                                                  GET /blog/tags/standards HTTP/1.1
## 9993:                                   \n63.140.98.80 - - [20/May/2015:21:05:50 +0000])
## 9994:                                                           GET /?flav=atom HTTP/1.1
## 9995:                                                           GET /robots.txt HTTP/1.1
## 9996:                                          GET /blog/tags/puppet?flav=rss20 HTTP/1.1
##       status respsize
##    1:    200   203023
##    2:    200   171717
##    3:    200    26185
##    4:    200     7697
##    5:    200     2892
##   ---                
## 9992:    200    13358
## 9993:     NA       NA
## 9994:    200    32352
## 9995:    200       NA
## 9996:    200    14872
##                                                               referer
##    1: http://semicomplete.com/presentations/logstash-monitorama-2013/
##    2: http://semicomplete.com/presentations/logstash-monitorama-2013/
##    3: http://semicomplete.com/presentations/logstash-monitorama-2013/
##    4: http://semicomplete.com/presentations/logstash-monitorama-2013/
##    5: http://semicomplete.com/presentations/logstash-monitorama-2013/
##   ---                                                                
## 9992:                                                              NA
## 9993:                                                              NA
## 9994:                                                              NA
## 9995:                                                              NA
## 9996:                                                              NA
##                                                                                                                         agent
##    1: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
##    2: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
##    3: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
##    4: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
##    5: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
##   ---                                                                                                                        
## 9992:                                                        Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)
## 9993:                                                                                                                      NA
## 9994:                                                Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)
## 9995:                                                     Mozilla/5.0 (Windows NT 5.1; rv:6.0.2) Gecko/20100101 Firefox/6.0.2
## 9996:                                                             UniversalFeedParser/4.2-pre-314-svn +http://feedparser.org/
##       hour wday morning
##    1:   10    1    TRUE
##    2:   10    1    TRUE
##    3:   10    1    TRUE
##    4:   10    1    TRUE
##    5:   10    1    TRUE
##   ---                  
## 9992:   21    4   FALSE
## 9993:   21    4   FALSE
## 9994:   21    4   FALSE
## 9995:   21    4   FALSE
## 9996:   21    4   FALSE
logs[ , weekend:= wday %in% c(1,7)]
##                  ip identd uname                time
##    1:  83.149.9.216     NA    NA 2015-05-17 10:05:03
##    2:  83.149.9.216     NA    NA 2015-05-17 10:05:43
##    3:  83.149.9.216     NA    NA 2015-05-17 10:05:47
##    4:  83.149.9.216     NA    NA 2015-05-17 10:05:12
##    5:  83.149.9.216     NA    NA 2015-05-17 10:05:07
##   ---                                               
## 9992: 100.43.83.137     NA    NA 2015-05-20 21:05:01
## 9993:  63.140.98.80     NA    NA 2015-05-20 21:05:28
## 9994: 66.249.73.135     NA    NA 2015-05-20 21:05:00
## 9995:   180.76.6.56     NA    NA 2015-05-20 21:05:56
## 9996:  46.105.14.53     NA    NA 2015-05-20 21:05:15
##                                                                                  request
##    1:      GET /presentations/logstash-monitorama-2013/images/kibana-search.png HTTP/1.1
##    2:  GET /presentations/logstash-monitorama-2013/images/kibana-dashboard3.png HTTP/1.1
##    3: GET /presentations/logstash-monitorama-2013/plugin/highlight/highlight.js HTTP/1.1
##    4:        GET /presentations/logstash-monitorama-2013/plugin/zoom-js/zoom.js HTTP/1.1
##    5:         GET /presentations/logstash-monitorama-2013/plugin/notes/notes.js HTTP/1.1
##   ---                                                                                   
## 9992:                                                  GET /blog/tags/standards HTTP/1.1
## 9993:                                   \n63.140.98.80 - - [20/May/2015:21:05:50 +0000])
## 9994:                                                           GET /?flav=atom HTTP/1.1
## 9995:                                                           GET /robots.txt HTTP/1.1
## 9996:                                          GET /blog/tags/puppet?flav=rss20 HTTP/1.1
##       status respsize
##    1:    200   203023
##    2:    200   171717
##    3:    200    26185
##    4:    200     7697
##    5:    200     2892
##   ---                
## 9992:    200    13358
## 9993:     NA       NA
## 9994:    200    32352
## 9995:    200       NA
## 9996:    200    14872
##                                                               referer
##    1: http://semicomplete.com/presentations/logstash-monitorama-2013/
##    2: http://semicomplete.com/presentations/logstash-monitorama-2013/
##    3: http://semicomplete.com/presentations/logstash-monitorama-2013/
##    4: http://semicomplete.com/presentations/logstash-monitorama-2013/
##    5: http://semicomplete.com/presentations/logstash-monitorama-2013/
##   ---                                                                
## 9992:                                                              NA
## 9993:                                                              NA
## 9994:                                                              NA
## 9995:                                                              NA
## 9996:                                                              NA
##                                                                                                                         agent
##    1: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
##    2: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
##    3: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
##    4: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
##    5: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
##   ---                                                                                                                        
## 9992:                                                        Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)
## 9993:                                                                                                                      NA
## 9994:                                                Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)
## 9995:                                                     Mozilla/5.0 (Windows NT 5.1; rv:6.0.2) Gecko/20100101 Firefox/6.0.2
## 9996:                                                             UniversalFeedParser/4.2-pre-314-svn +http://feedparser.org/
##       hour wday morning weekend
##    1:   10    1    TRUE    TRUE
##    2:   10    1    TRUE    TRUE
##    3:   10    1    TRUE    TRUE
##    4:   10    1    TRUE    TRUE
##    5:   10    1    TRUE    TRUE
##   ---                          
## 9992:   21    4   FALSE   FALSE
## 9993:   21    4   FALSE   FALSE
## 9994:   21    4   FALSE   FALSE
## 9995:   21    4   FALSE   FALSE
## 9996:   21    4   FALSE   FALSE

3 Geolocation packages

There are few packages for resolving IPs: - rgeolocate - ggmap - iptools - ipapi (gh: hrbrmstr/ipapi)

Which one to use depends on API preferences, plus any additional requirements.

Play it smart - don’t call for every record, call for every unique record. Cache values where possible!

if(!require(ipapi)) devtools::install_github("hrbrmstr/ipapi")
## Loading required package: ipapi
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'ipapi'
## Using GitHub PAT from envvar GITHUB_PAT
## Downloading GitHub repo hrbrmstr/ipapi@master
## from URL https://api.github.com/repos/hrbrmstr/ipapi/zipball/master
## Installing ipapi
## Installing pbapply
## '/home/travis/R-bin/lib/R/bin/R' --no-site-file --no-environ --no-save  \
##   --no-restore --quiet CMD INSTALL  \
##   '/tmp/RtmpuEYtJl/devtoolsfac652faab0d/pbapply'  \
##   --library='/home/travis/R/Library' --install-tests
## 
## '/home/travis/R-bin/lib/R/bin/R' --no-site-file --no-environ --no-save  \
##   --no-restore --quiet CMD INSTALL  \
##   '/tmp/RtmpuEYtJl/devtoolsfac67470d5e8/hrbrmstr-ipapi-c612329'  \
##   --library='/home/travis/R/Library' --install-tests
## 
library(ipapi)
ips<-logs[,unique(ip)]

example<-TRUE
iptblloc<-"https://raw.githubusercontent.com/stephlocke/lazyCDN/master/sampleIPtbl.csv"

ip_tbl<-if(example) fread(iptblloc) 
## Warning in fread(iptblloc): Bumped column 13 to type character on data
## row 29, field contains 'EC4N'. Coercing previously read values in this
## column from logical, integer or numeric back to character which may not
## be lossless; e.g., if '00' and '000' occurred before they will now be just
## '0', and there may be inconsistencies with treatment of ',,' and ',NA,' too
## (if they occurred in this column before the bump). If this matters please
## rerun and set 'colClasses' to 'character' for this column. Please note
## that column type detection uses the first 5 rows, the middle 5 rows and the
## last 5 rows, so hopefully this message should be very rare. If reporting to
## datatable-help, please rerun and include the output from verbose=TRUE.
#ip_tbl<- ipapi::geolocate(ips)[, status:=NULL]

# Join IP results to log data
logs<-logs[ip_tbl, on=c(ip="query")]
head(logs)
##              ip identd uname                time
## 1: 83.149.9.216     NA    NA 2015-05-17 10:05:03
## 2: 83.149.9.216     NA    NA 2015-05-17 10:05:43
## 3: 83.149.9.216     NA    NA 2015-05-17 10:05:47
## 4: 83.149.9.216     NA    NA 2015-05-17 10:05:12
## 5: 83.149.9.216     NA    NA 2015-05-17 10:05:07
## 6: 83.149.9.216     NA    NA 2015-05-17 10:05:34
##                                                                               request
## 1:      GET /presentations/logstash-monitorama-2013/images/kibana-search.png HTTP/1.1
## 2:  GET /presentations/logstash-monitorama-2013/images/kibana-dashboard3.png HTTP/1.1
## 3: GET /presentations/logstash-monitorama-2013/plugin/highlight/highlight.js HTTP/1.1
## 4:        GET /presentations/logstash-monitorama-2013/plugin/zoom-js/zoom.js HTTP/1.1
## 5:         GET /presentations/logstash-monitorama-2013/plugin/notes/notes.js HTTP/1.1
## 6:          GET /presentations/logstash-monitorama-2013/images/sad-medic.png HTTP/1.1
##    status respsize
## 1:    200   203023
## 2:    200   171717
## 3:    200    26185
## 4:    200     7697
## 5:    200     2892
## 6:    200   430406
##                                                            referer
## 1: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 2: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 3: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 4: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 5: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 6: http://semicomplete.com/presentations/logstash-monitorama-2013/
##                                                                                                                      agent
## 1: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 2: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 3: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 4: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 5: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 6: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
##    hour wday morning weekend                   as   city country
## 1:   10    1    TRUE    TRUE AS25159 PJSC MegaFon Moscow  Russia
## 2:   10    1    TRUE    TRUE AS25159 PJSC MegaFon Moscow  Russia
## 3:   10    1    TRUE    TRUE AS25159 PJSC MegaFon Moscow  Russia
## 4:   10    1    TRUE    TRUE AS25159 PJSC MegaFon Moscow  Russia
## 5:   10    1    TRUE    TRUE AS25159 PJSC MegaFon Moscow  Russia
## 6:   10    1    TRUE    TRUE AS25159 PJSC MegaFon Moscow  Russia
##    countryCode          isp     lat     lon          org region regionName
## 1:          RU PJSC MegaFon 55.7522 37.6156 PJSC MegaFon    MOW     Moscow
## 2:          RU PJSC MegaFon 55.7522 37.6156 PJSC MegaFon    MOW     Moscow
## 3:          RU PJSC MegaFon 55.7522 37.6156 PJSC MegaFon    MOW     Moscow
## 4:          RU PJSC MegaFon 55.7522 37.6156 PJSC MegaFon    MOW     Moscow
## 5:          RU PJSC MegaFon 55.7522 37.6156 PJSC MegaFon    MOW     Moscow
## 6:          RU PJSC MegaFon 55.7522 37.6156 PJSC MegaFon    MOW     Moscow
##         timezone    zip
## 1: Europe/Moscow 101194
## 2: Europe/Moscow 101194
## 3: Europe/Moscow 101194
## 4: Europe/Moscow 101194
## 5: Europe/Moscow 101194
## 6: Europe/Moscow 101194

4 URL handling

The format of the Apache request log means that the request component needs splitting up. The values are not always in quite the right format so you should always check for errors.

logs[,c("verb","url","scheme"):=tstrsplit(request," ")[1:3]]
##                 ip identd uname                time
##    1: 83.149.9.216     NA    NA 2015-05-17 10:05:03
##    2: 83.149.9.216     NA    NA 2015-05-17 10:05:43
##    3: 83.149.9.216     NA    NA 2015-05-17 10:05:47
##    4: 83.149.9.216     NA    NA 2015-05-17 10:05:12
##    5: 83.149.9.216     NA    NA 2015-05-17 10:05:07
##   ---                                              
## 9992: 38.99.236.50     NA    NA 2015-05-20 21:05:48
## 9993: 38.99.236.50     NA    NA 2015-05-20 21:05:42
## 9994: 38.99.236.50     NA    NA 2015-05-20 21:05:29
## 9995: 38.99.236.50     NA    NA 2015-05-20 21:05:31
## 9996:  180.76.6.56     NA    NA 2015-05-20 21:05:56
##                                                                                        request
##    1:            GET /presentations/logstash-monitorama-2013/images/kibana-search.png HTTP/1.1
##    2:        GET /presentations/logstash-monitorama-2013/images/kibana-dashboard3.png HTTP/1.1
##    3:       GET /presentations/logstash-monitorama-2013/plugin/highlight/highlight.js HTTP/1.1
##    4:              GET /presentations/logstash-monitorama-2013/plugin/zoom-js/zoom.js HTTP/1.1
##    5:               GET /presentations/logstash-monitorama-2013/plugin/notes/notes.js HTTP/1.1
##   ---                                                                                         
## 9992:       GET /presentations/logstash-puppetconf-2012/images/stats-negative-min.png HTTP/1.1
## 9993:                     GET /presentations/logstash-puppetconf-2012/images/logs.jpg HTTP/1.1
## 9994: GET /presentations/logstash-puppetconf-2012/images/apache-negative-duration.png HTTP/1.1
## 9995:                                                                GET /favicon.ico HTTP/1.1
## 9996:                                                                 GET /robots.txt HTTP/1.1
##       status respsize
##    1:    200   203023
##    2:    200   171717
##    3:    200    26185
##    4:    200     7697
##    5:    200     2892
##   ---                
## 9992:    200    46139
## 9993:    200   663847
## 9994:    200    97173
## 9995:    200     3638
## 9996:    200       NA
##                                                               referer
##    1: http://semicomplete.com/presentations/logstash-monitorama-2013/
##    2: http://semicomplete.com/presentations/logstash-monitorama-2013/
##    3: http://semicomplete.com/presentations/logstash-monitorama-2013/
##    4: http://semicomplete.com/presentations/logstash-monitorama-2013/
##    5: http://semicomplete.com/presentations/logstash-monitorama-2013/
##   ---                                                                
## 9992: http://semicomplete.com/presentations/logstash-puppetconf-2012/
## 9993: http://semicomplete.com/presentations/logstash-puppetconf-2012/
## 9994: http://semicomplete.com/presentations/logstash-puppetconf-2012/
## 9995:                                                              NA
## 9996:                                                              NA
##                                                                                                                         agent
##    1: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
##    2: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
##    3: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
##    4: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
##    5: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
##   ---                                                                                                                        
## 9992:           Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36
## 9993:           Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36
## 9994:           Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36
## 9995:           Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36
## 9996:                                                     Mozilla/5.0 (Windows NT 5.1; rv:6.0.2) Gecko/20100101 Firefox/6.0.2
##       hour wday morning weekend
##    1:   10    1    TRUE    TRUE
##    2:   10    1    TRUE    TRUE
##    3:   10    1    TRUE    TRUE
##    4:   10    1    TRUE    TRUE
##    5:   10    1    TRUE    TRUE
##   ---                          
## 9992:   21    4   FALSE   FALSE
## 9993:   21    4   FALSE   FALSE
## 9994:   21    4   FALSE   FALSE
## 9995:   21    4   FALSE   FALSE
## 9996:   21    4   FALSE   FALSE
##                                                                  as
##    1:                                          AS25159 PJSC MegaFon
##    2:                                          AS25159 PJSC MegaFon
##    3:                                          AS25159 PJSC MegaFon
##    4:                                          AS25159 PJSC MegaFon
##    5:                                          AS25159 PJSC MegaFon
##   ---                                                              
## 9992:                                   AS174 Cogent Communications
## 9993:                                   AS174 Cogent Communications
## 9994:                                   AS174 Cogent Communications
## 9995:                                   AS174 Cogent Communications
## 9996: AS55967 Beijing Baidu Netcom Science and Technology Co., Ltd.
##          city       country countryCode
##    1:  Moscow        Russia          RU
##    2:  Moscow        Russia          RU
##    3:  Moscow        Russia          RU
##    4:  Moscow        Russia          RU
##    5:  Moscow        Russia          RU
##   ---                                  
## 9992:   Miami United States          US
## 9993:   Miami United States          US
## 9994:   Miami United States          US
## 9995:   Miami United States          US
## 9996: Beijing         China          CN
##                                                   isp     lat      lon
##    1:                                    PJSC MegaFon 55.7522  37.6156
##    2:                                    PJSC MegaFon 55.7522  37.6156
##    3:                                    PJSC MegaFon 55.7522  37.6156
##    4:                                    PJSC MegaFon 55.7522  37.6156
##    5:                                    PJSC MegaFon 55.7522  37.6156
##   ---                                                                 
## 9992:                           Cogent Communications 25.8530 -80.2348
## 9993:                           Cogent Communications 25.8530 -80.2348
## 9994:                           Cogent Communications 25.8530 -80.2348
## 9995:                           Cogent Communications 25.8530 -80.2348
## 9996: Beijing Baidu Netcom Science and Technology Co. 39.9289 116.3883
##                                                   org region regionName
##    1:                                    PJSC MegaFon    MOW     Moscow
##    2:                                    PJSC MegaFon    MOW     Moscow
##    3:                                    PJSC MegaFon    MOW     Moscow
##    4:                                    PJSC MegaFon    MOW     Moscow
##    5:                                    PJSC MegaFon    MOW     Moscow
##   ---                                                                  
## 9992:                           Cogent Communications     FL    Florida
## 9993:                           Cogent Communications     FL    Florida
## 9994:                           Cogent Communications     FL    Florida
## 9995:                           Cogent Communications     FL    Florida
## 9996: Beijing Baidu Netcom Science and Technology Co.     11    Beijing
##               timezone    zip verb
##    1:    Europe/Moscow 101194  GET
##    2:    Europe/Moscow 101194  GET
##    3:    Europe/Moscow 101194  GET
##    4:    Europe/Moscow 101194  GET
##    5:    Europe/Moscow 101194  GET
##   ---                             
## 9992: America/New_York  33147  GET
## 9993: America/New_York  33147  GET
## 9994: America/New_York  33147  GET
## 9995: America/New_York  33147  GET
## 9996:    Asia/Shanghai         GET
##                                                                               url
##    1:            /presentations/logstash-monitorama-2013/images/kibana-search.png
##    2:        /presentations/logstash-monitorama-2013/images/kibana-dashboard3.png
##    3:       /presentations/logstash-monitorama-2013/plugin/highlight/highlight.js
##    4:              /presentations/logstash-monitorama-2013/plugin/zoom-js/zoom.js
##    5:               /presentations/logstash-monitorama-2013/plugin/notes/notes.js
##   ---                                                                            
## 9992:       /presentations/logstash-puppetconf-2012/images/stats-negative-min.png
## 9993:                     /presentations/logstash-puppetconf-2012/images/logs.jpg
## 9994: /presentations/logstash-puppetconf-2012/images/apache-negative-duration.png
## 9995:                                                                /favicon.ico
## 9996:                                                                 /robots.txt
##         scheme
##    1: HTTP/1.1
##    2: HTTP/1.1
##    3: HTTP/1.1
##    4: HTTP/1.1
##    5: HTTP/1.1
##   ---         
## 9992: HTTP/1.1
## 9993: HTTP/1.1
## 9994: HTTP/1.1
## 9995: HTTP/1.1
## 9996: HTTP/1.1
# isolate issues!
issues<-logs[,!((verb %like% "^[A-Z]{3,}$")&
                 (scheme %like% "^HTTP"))]
errors<-logs[issues,]
logs<-logs[!issues, ]
library(urltools)
logs[,c("path","params"):=.(path(url),parameters(url))]
##                 ip identd uname                time
##    1: 83.149.9.216     NA    NA 2015-05-17 10:05:03
##    2: 83.149.9.216     NA    NA 2015-05-17 10:05:43
##    3: 83.149.9.216     NA    NA 2015-05-17 10:05:47
##    4: 83.149.9.216     NA    NA 2015-05-17 10:05:12
##    5: 83.149.9.216     NA    NA 2015-05-17 10:05:07
##   ---                                              
## 9982: 38.99.236.50     NA    NA 2015-05-20 21:05:48
## 9983: 38.99.236.50     NA    NA 2015-05-20 21:05:42
## 9984: 38.99.236.50     NA    NA 2015-05-20 21:05:29
## 9985: 38.99.236.50     NA    NA 2015-05-20 21:05:31
## 9986:  180.76.6.56     NA    NA 2015-05-20 21:05:56
##                                                                                        request
##    1:            GET /presentations/logstash-monitorama-2013/images/kibana-search.png HTTP/1.1
##    2:        GET /presentations/logstash-monitorama-2013/images/kibana-dashboard3.png HTTP/1.1
##    3:       GET /presentations/logstash-monitorama-2013/plugin/highlight/highlight.js HTTP/1.1
##    4:              GET /presentations/logstash-monitorama-2013/plugin/zoom-js/zoom.js HTTP/1.1
##    5:               GET /presentations/logstash-monitorama-2013/plugin/notes/notes.js HTTP/1.1
##   ---                                                                                         
## 9982:       GET /presentations/logstash-puppetconf-2012/images/stats-negative-min.png HTTP/1.1
## 9983:                     GET /presentations/logstash-puppetconf-2012/images/logs.jpg HTTP/1.1
## 9984: GET /presentations/logstash-puppetconf-2012/images/apache-negative-duration.png HTTP/1.1
## 9985:                                                                GET /favicon.ico HTTP/1.1
## 9986:                                                                 GET /robots.txt HTTP/1.1
##       status respsize
##    1:    200   203023
##    2:    200   171717
##    3:    200    26185
##    4:    200     7697
##    5:    200     2892
##   ---                
## 9982:    200    46139
## 9983:    200   663847
## 9984:    200    97173
## 9985:    200     3638
## 9986:    200       NA
##                                                               referer
##    1: http://semicomplete.com/presentations/logstash-monitorama-2013/
##    2: http://semicomplete.com/presentations/logstash-monitorama-2013/
##    3: http://semicomplete.com/presentations/logstash-monitorama-2013/
##    4: http://semicomplete.com/presentations/logstash-monitorama-2013/
##    5: http://semicomplete.com/presentations/logstash-monitorama-2013/
##   ---                                                                
## 9982: http://semicomplete.com/presentations/logstash-puppetconf-2012/
## 9983: http://semicomplete.com/presentations/logstash-puppetconf-2012/
## 9984: http://semicomplete.com/presentations/logstash-puppetconf-2012/
## 9985:                                                              NA
## 9986:                                                              NA
##                                                                                                                         agent
##    1: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
##    2: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
##    3: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
##    4: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
##    5: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
##   ---                                                                                                                        
## 9982:           Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36
## 9983:           Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36
## 9984:           Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36
## 9985:           Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36
## 9986:                                                     Mozilla/5.0 (Windows NT 5.1; rv:6.0.2) Gecko/20100101 Firefox/6.0.2
##       hour wday morning weekend
##    1:   10    1    TRUE    TRUE
##    2:   10    1    TRUE    TRUE
##    3:   10    1    TRUE    TRUE
##    4:   10    1    TRUE    TRUE
##    5:   10    1    TRUE    TRUE
##   ---                          
## 9982:   21    4   FALSE   FALSE
## 9983:   21    4   FALSE   FALSE
## 9984:   21    4   FALSE   FALSE
## 9985:   21    4   FALSE   FALSE
## 9986:   21    4   FALSE   FALSE
##                                                                  as
##    1:                                          AS25159 PJSC MegaFon
##    2:                                          AS25159 PJSC MegaFon
##    3:                                          AS25159 PJSC MegaFon
##    4:                                          AS25159 PJSC MegaFon
##    5:                                          AS25159 PJSC MegaFon
##   ---                                                              
## 9982:                                   AS174 Cogent Communications
## 9983:                                   AS174 Cogent Communications
## 9984:                                   AS174 Cogent Communications
## 9985:                                   AS174 Cogent Communications
## 9986: AS55967 Beijing Baidu Netcom Science and Technology Co., Ltd.
##          city       country countryCode
##    1:  Moscow        Russia          RU
##    2:  Moscow        Russia          RU
##    3:  Moscow        Russia          RU
##    4:  Moscow        Russia          RU
##    5:  Moscow        Russia          RU
##   ---                                  
## 9982:   Miami United States          US
## 9983:   Miami United States          US
## 9984:   Miami United States          US
## 9985:   Miami United States          US
## 9986: Beijing         China          CN
##                                                   isp     lat      lon
##    1:                                    PJSC MegaFon 55.7522  37.6156
##    2:                                    PJSC MegaFon 55.7522  37.6156
##    3:                                    PJSC MegaFon 55.7522  37.6156
##    4:                                    PJSC MegaFon 55.7522  37.6156
##    5:                                    PJSC MegaFon 55.7522  37.6156
##   ---                                                                 
## 9982:                           Cogent Communications 25.8530 -80.2348
## 9983:                           Cogent Communications 25.8530 -80.2348
## 9984:                           Cogent Communications 25.8530 -80.2348
## 9985:                           Cogent Communications 25.8530 -80.2348
## 9986: Beijing Baidu Netcom Science and Technology Co. 39.9289 116.3883
##                                                   org region regionName
##    1:                                    PJSC MegaFon    MOW     Moscow
##    2:                                    PJSC MegaFon    MOW     Moscow
##    3:                                    PJSC MegaFon    MOW     Moscow
##    4:                                    PJSC MegaFon    MOW     Moscow
##    5:                                    PJSC MegaFon    MOW     Moscow
##   ---                                                                  
## 9982:                           Cogent Communications     FL    Florida
## 9983:                           Cogent Communications     FL    Florida
## 9984:                           Cogent Communications     FL    Florida
## 9985:                           Cogent Communications     FL    Florida
## 9986: Beijing Baidu Netcom Science and Technology Co.     11    Beijing
##               timezone    zip verb
##    1:    Europe/Moscow 101194  GET
##    2:    Europe/Moscow 101194  GET
##    3:    Europe/Moscow 101194  GET
##    4:    Europe/Moscow 101194  GET
##    5:    Europe/Moscow 101194  GET
##   ---                             
## 9982: America/New_York  33147  GET
## 9983: America/New_York  33147  GET
## 9984: America/New_York  33147  GET
## 9985: America/New_York  33147  GET
## 9986:    Asia/Shanghai         GET
##                                                                               url
##    1:            /presentations/logstash-monitorama-2013/images/kibana-search.png
##    2:        /presentations/logstash-monitorama-2013/images/kibana-dashboard3.png
##    3:       /presentations/logstash-monitorama-2013/plugin/highlight/highlight.js
##    4:              /presentations/logstash-monitorama-2013/plugin/zoom-js/zoom.js
##    5:               /presentations/logstash-monitorama-2013/plugin/notes/notes.js
##   ---                                                                            
## 9982:       /presentations/logstash-puppetconf-2012/images/stats-negative-min.png
## 9983:                     /presentations/logstash-puppetconf-2012/images/logs.jpg
## 9984: /presentations/logstash-puppetconf-2012/images/apache-negative-duration.png
## 9985:                                                                /favicon.ico
## 9986:                                                                 /robots.txt
##         scheme
##    1: HTTP/1.1
##    2: HTTP/1.1
##    3: HTTP/1.1
##    4: HTTP/1.1
##    5: HTTP/1.1
##   ---         
## 9982: HTTP/1.1
## 9983: HTTP/1.1
## 9984: HTTP/1.1
## 9985: HTTP/1.1
## 9986: HTTP/1.1
##                                                                             path
##    1:            presentations/logstash-monitorama-2013/images/kibana-search.png
##    2:        presentations/logstash-monitorama-2013/images/kibana-dashboard3.png
##    3:       presentations/logstash-monitorama-2013/plugin/highlight/highlight.js
##    4:              presentations/logstash-monitorama-2013/plugin/zoom-js/zoom.js
##    5:               presentations/logstash-monitorama-2013/plugin/notes/notes.js
##   ---                                                                           
## 9982:       presentations/logstash-puppetconf-2012/images/stats-negative-min.png
## 9983:                     presentations/logstash-puppetconf-2012/images/logs.jpg
## 9984: presentations/logstash-puppetconf-2012/images/apache-negative-duration.png
## 9985:                                                                favicon.ico
## 9986:                                                                 robots.txt
##       params
##    1:     NA
##    2:     NA
##    3:     NA
##    4:     NA
##    5:     NA
##   ---       
## 9982:     NA
## 9983:     NA
## 9984:     NA
## 9985:     NA
## 9986:     NA

5 Event steps

Often you need to worry about steps taken over time. The data.table package gives you an easy way to add IDs to rows or groups.

logs[order(time),`:=`(order=.SD[,.I], visit=.GRP), .(ip,agent)]
##                 ip identd uname                time
##    1: 83.149.9.216     NA    NA 2015-05-17 10:05:03
##    2: 83.149.9.216     NA    NA 2015-05-17 10:05:43
##    3: 83.149.9.216     NA    NA 2015-05-17 10:05:47
##    4: 83.149.9.216     NA    NA 2015-05-17 10:05:12
##    5: 83.149.9.216     NA    NA 2015-05-17 10:05:07
##   ---                                              
## 9982: 38.99.236.50     NA    NA 2015-05-20 21:05:48
## 9983: 38.99.236.50     NA    NA 2015-05-20 21:05:42
## 9984: 38.99.236.50     NA    NA 2015-05-20 21:05:29
## 9985: 38.99.236.50     NA    NA 2015-05-20 21:05:31
## 9986:  180.76.6.56     NA    NA 2015-05-20 21:05:56
##                                                                                        request
##    1:            GET /presentations/logstash-monitorama-2013/images/kibana-search.png HTTP/1.1
##    2:        GET /presentations/logstash-monitorama-2013/images/kibana-dashboard3.png HTTP/1.1
##    3:       GET /presentations/logstash-monitorama-2013/plugin/highlight/highlight.js HTTP/1.1
##    4:              GET /presentations/logstash-monitorama-2013/plugin/zoom-js/zoom.js HTTP/1.1
##    5:               GET /presentations/logstash-monitorama-2013/plugin/notes/notes.js HTTP/1.1
##   ---                                                                                         
## 9982:       GET /presentations/logstash-puppetconf-2012/images/stats-negative-min.png HTTP/1.1
## 9983:                     GET /presentations/logstash-puppetconf-2012/images/logs.jpg HTTP/1.1
## 9984: GET /presentations/logstash-puppetconf-2012/images/apache-negative-duration.png HTTP/1.1
## 9985:                                                                GET /favicon.ico HTTP/1.1
## 9986:                                                                 GET /robots.txt HTTP/1.1
##       status respsize
##    1:    200   203023
##    2:    200   171717
##    3:    200    26185
##    4:    200     7697
##    5:    200     2892
##   ---                
## 9982:    200    46139
## 9983:    200   663847
## 9984:    200    97173
## 9985:    200     3638
## 9986:    200       NA
##                                                               referer
##    1: http://semicomplete.com/presentations/logstash-monitorama-2013/
##    2: http://semicomplete.com/presentations/logstash-monitorama-2013/
##    3: http://semicomplete.com/presentations/logstash-monitorama-2013/
##    4: http://semicomplete.com/presentations/logstash-monitorama-2013/
##    5: http://semicomplete.com/presentations/logstash-monitorama-2013/
##   ---                                                                
## 9982: http://semicomplete.com/presentations/logstash-puppetconf-2012/
## 9983: http://semicomplete.com/presentations/logstash-puppetconf-2012/
## 9984: http://semicomplete.com/presentations/logstash-puppetconf-2012/
## 9985:                                                              NA
## 9986:                                                              NA
##                                                                                                                         agent
##    1: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
##    2: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
##    3: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
##    4: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
##    5: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
##   ---                                                                                                                        
## 9982:           Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36
## 9983:           Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36
## 9984:           Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36
## 9985:           Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36
## 9986:                                                     Mozilla/5.0 (Windows NT 5.1; rv:6.0.2) Gecko/20100101 Firefox/6.0.2
##       hour wday morning weekend
##    1:   10    1    TRUE    TRUE
##    2:   10    1    TRUE    TRUE
##    3:   10    1    TRUE    TRUE
##    4:   10    1    TRUE    TRUE
##    5:   10    1    TRUE    TRUE
##   ---                          
## 9982:   21    4   FALSE   FALSE
## 9983:   21    4   FALSE   FALSE
## 9984:   21    4   FALSE   FALSE
## 9985:   21    4   FALSE   FALSE
## 9986:   21    4   FALSE   FALSE
##                                                                  as
##    1:                                          AS25159 PJSC MegaFon
##    2:                                          AS25159 PJSC MegaFon
##    3:                                          AS25159 PJSC MegaFon
##    4:                                          AS25159 PJSC MegaFon
##    5:                                          AS25159 PJSC MegaFon
##   ---                                                              
## 9982:                                   AS174 Cogent Communications
## 9983:                                   AS174 Cogent Communications
## 9984:                                   AS174 Cogent Communications
## 9985:                                   AS174 Cogent Communications
## 9986: AS55967 Beijing Baidu Netcom Science and Technology Co., Ltd.
##          city       country countryCode
##    1:  Moscow        Russia          RU
##    2:  Moscow        Russia          RU
##    3:  Moscow        Russia          RU
##    4:  Moscow        Russia          RU
##    5:  Moscow        Russia          RU
##   ---                                  
## 9982:   Miami United States          US
## 9983:   Miami United States          US
## 9984:   Miami United States          US
## 9985:   Miami United States          US
## 9986: Beijing         China          CN
##                                                   isp     lat      lon
##    1:                                    PJSC MegaFon 55.7522  37.6156
##    2:                                    PJSC MegaFon 55.7522  37.6156
##    3:                                    PJSC MegaFon 55.7522  37.6156
##    4:                                    PJSC MegaFon 55.7522  37.6156
##    5:                                    PJSC MegaFon 55.7522  37.6156
##   ---                                                                 
## 9982:                           Cogent Communications 25.8530 -80.2348
## 9983:                           Cogent Communications 25.8530 -80.2348
## 9984:                           Cogent Communications 25.8530 -80.2348
## 9985:                           Cogent Communications 25.8530 -80.2348
## 9986: Beijing Baidu Netcom Science and Technology Co. 39.9289 116.3883
##                                                   org region regionName
##    1:                                    PJSC MegaFon    MOW     Moscow
##    2:                                    PJSC MegaFon    MOW     Moscow
##    3:                                    PJSC MegaFon    MOW     Moscow
##    4:                                    PJSC MegaFon    MOW     Moscow
##    5:                                    PJSC MegaFon    MOW     Moscow
##   ---                                                                  
## 9982:                           Cogent Communications     FL    Florida
## 9983:                           Cogent Communications     FL    Florida
## 9984:                           Cogent Communications     FL    Florida
## 9985:                           Cogent Communications     FL    Florida
## 9986: Beijing Baidu Netcom Science and Technology Co.     11    Beijing
##               timezone    zip verb
##    1:    Europe/Moscow 101194  GET
##    2:    Europe/Moscow 101194  GET
##    3:    Europe/Moscow 101194  GET
##    4:    Europe/Moscow 101194  GET
##    5:    Europe/Moscow 101194  GET
##   ---                             
## 9982: America/New_York  33147  GET
## 9983: America/New_York  33147  GET
## 9984: America/New_York  33147  GET
## 9985: America/New_York  33147  GET
## 9986:    Asia/Shanghai         GET
##                                                                               url
##    1:            /presentations/logstash-monitorama-2013/images/kibana-search.png
##    2:        /presentations/logstash-monitorama-2013/images/kibana-dashboard3.png
##    3:       /presentations/logstash-monitorama-2013/plugin/highlight/highlight.js
##    4:              /presentations/logstash-monitorama-2013/plugin/zoom-js/zoom.js
##    5:               /presentations/logstash-monitorama-2013/plugin/notes/notes.js
##   ---                                                                            
## 9982:       /presentations/logstash-puppetconf-2012/images/stats-negative-min.png
## 9983:                     /presentations/logstash-puppetconf-2012/images/logs.jpg
## 9984: /presentations/logstash-puppetconf-2012/images/apache-negative-duration.png
## 9985:                                                                /favicon.ico
## 9986:                                                                 /robots.txt
##         scheme
##    1: HTTP/1.1
##    2: HTTP/1.1
##    3: HTTP/1.1
##    4: HTTP/1.1
##    5: HTTP/1.1
##   ---         
## 9982: HTTP/1.1
## 9983: HTTP/1.1
## 9984: HTTP/1.1
## 9985: HTTP/1.1
## 9986: HTTP/1.1
##                                                                             path
##    1:            presentations/logstash-monitorama-2013/images/kibana-search.png
##    2:        presentations/logstash-monitorama-2013/images/kibana-dashboard3.png
##    3:       presentations/logstash-monitorama-2013/plugin/highlight/highlight.js
##    4:              presentations/logstash-monitorama-2013/plugin/zoom-js/zoom.js
##    5:               presentations/logstash-monitorama-2013/plugin/notes/notes.js
##   ---                                                                           
## 9982:       presentations/logstash-puppetconf-2012/images/stats-negative-min.png
## 9983:                     presentations/logstash-puppetconf-2012/images/logs.jpg
## 9984: presentations/logstash-puppetconf-2012/images/apache-negative-duration.png
## 9985:                                                                favicon.ico
## 9986:                                                                 robots.txt
##       params order visit
##    1:     NA     2     1
##    2:     NA    14     1
##    3:     NA    16     1
##    4:     NA     5     1
##    5:     NA     3     1
##   ---                   
## 9982:     NA    24  1847
## 9983:     NA    23  1847
## 9984:     NA    14  1847
## 9985:     NA    15  1847
## 9986:     NA     1  1852

5.1 Most common landing pages

knitr::kable(logs[order==1,.N,path][
  order(-N)[1:10],])
path N
favicon.ico 221
NA 145
presentations/logstash-scale11x/images/ahhh___rage_face_by_samusmmx-d5g5zap.png 112
images/web/2009/banner.png 95
reset.css 91
style2.css 91
images/googledotcom.png 89
robots.txt 79
images/jordan-80.png 73
projects/xdotool/xdotool.xhtml 54

5.2 Most common exit pages

knitr::kable(logs[,.SD[which.max(order)],visit][
  ,.N,path][order(-N)[1:10],])
path N
favicon.ico 215
NA 149
presentations/logstash-scale11x/images/ahhh___rage_face_by_samusmmx-d5g5zap.png 111
images/googledotcom.png 94
images/jordan-80.png 90
images/web/2009/banner.png 90
style2.css 83
robots.txt 75
reset.css 63
projects/xdotool/xdotool.xhtml 63

5.3 Most common bounce pages

knitr::kable(logs[,.SD[which.max(order)],visit][
  order==1,.N,path][order(-N)[1:10],])
path N
presentations/logstash-scale11x/images/ahhh___rage_face_by_samusmmx-d5g5zap.png 105
images/googledotcom.png 89
favicon.ico 67
NA 62
robots.txt 43
presentations/logstash-scale11x/images/logstash.png 18
projects/xdotool/ 15
images/jordan-80.png 11
articles/dynamic-dns-with-dhcp/ 11
blog/geekery/ssl-latency.html 10

5.4 Most common error pages

logs[status>=500, .N, .(path,status)][order(-N)[1:pmin(10, .N)]]
##                  path status N
## 1: misc/Title.php.txt    500 2
## 2:  projects/xdotool/    500 1

5.5 Time since last request

logs[order(order), timesinceprevrequest:= time - shift(time) , visit]
##                 ip identd uname                time
##    1: 83.149.9.216     NA    NA 2015-05-17 10:05:03
##    2: 83.149.9.216     NA    NA 2015-05-17 10:05:43
##    3: 83.149.9.216     NA    NA 2015-05-17 10:05:47
##    4: 83.149.9.216     NA    NA 2015-05-17 10:05:12
##    5: 83.149.9.216     NA    NA 2015-05-17 10:05:07
##   ---                                              
## 9982: 38.99.236.50     NA    NA 2015-05-20 21:05:48
## 9983: 38.99.236.50     NA    NA 2015-05-20 21:05:42
## 9984: 38.99.236.50     NA    NA 2015-05-20 21:05:29
## 9985: 38.99.236.50     NA    NA 2015-05-20 21:05:31
## 9986:  180.76.6.56     NA    NA 2015-05-20 21:05:56
##                                                                                        request
##    1:            GET /presentations/logstash-monitorama-2013/images/kibana-search.png HTTP/1.1
##    2:        GET /presentations/logstash-monitorama-2013/images/kibana-dashboard3.png HTTP/1.1
##    3:       GET /presentations/logstash-monitorama-2013/plugin/highlight/highlight.js HTTP/1.1
##    4:              GET /presentations/logstash-monitorama-2013/plugin/zoom-js/zoom.js HTTP/1.1
##    5:               GET /presentations/logstash-monitorama-2013/plugin/notes/notes.js HTTP/1.1
##   ---                                                                                         
## 9982:       GET /presentations/logstash-puppetconf-2012/images/stats-negative-min.png HTTP/1.1
## 9983:                     GET /presentations/logstash-puppetconf-2012/images/logs.jpg HTTP/1.1
## 9984: GET /presentations/logstash-puppetconf-2012/images/apache-negative-duration.png HTTP/1.1
## 9985:                                                                GET /favicon.ico HTTP/1.1
## 9986:                                                                 GET /robots.txt HTTP/1.1
##       status respsize
##    1:    200   203023
##    2:    200   171717
##    3:    200    26185
##    4:    200     7697
##    5:    200     2892
##   ---                
## 9982:    200    46139
## 9983:    200   663847
## 9984:    200    97173
## 9985:    200     3638
## 9986:    200       NA
##                                                               referer
##    1: http://semicomplete.com/presentations/logstash-monitorama-2013/
##    2: http://semicomplete.com/presentations/logstash-monitorama-2013/
##    3: http://semicomplete.com/presentations/logstash-monitorama-2013/
##    4: http://semicomplete.com/presentations/logstash-monitorama-2013/
##    5: http://semicomplete.com/presentations/logstash-monitorama-2013/
##   ---                                                                
## 9982: http://semicomplete.com/presentations/logstash-puppetconf-2012/
## 9983: http://semicomplete.com/presentations/logstash-puppetconf-2012/
## 9984: http://semicomplete.com/presentations/logstash-puppetconf-2012/
## 9985:                                                              NA
## 9986:                                                              NA
##                                                                                                                         agent
##    1: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
##    2: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
##    3: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
##    4: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
##    5: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
##   ---                                                                                                                        
## 9982:           Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36
## 9983:           Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36
## 9984:           Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36
## 9985:           Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36
## 9986:                                                     Mozilla/5.0 (Windows NT 5.1; rv:6.0.2) Gecko/20100101 Firefox/6.0.2
##       hour wday morning weekend
##    1:   10    1    TRUE    TRUE
##    2:   10    1    TRUE    TRUE
##    3:   10    1    TRUE    TRUE
##    4:   10    1    TRUE    TRUE
##    5:   10    1    TRUE    TRUE
##   ---                          
## 9982:   21    4   FALSE   FALSE
## 9983:   21    4   FALSE   FALSE
## 9984:   21    4   FALSE   FALSE
## 9985:   21    4   FALSE   FALSE
## 9986:   21    4   FALSE   FALSE
##                                                                  as
##    1:                                          AS25159 PJSC MegaFon
##    2:                                          AS25159 PJSC MegaFon
##    3:                                          AS25159 PJSC MegaFon
##    4:                                          AS25159 PJSC MegaFon
##    5:                                          AS25159 PJSC MegaFon
##   ---                                                              
## 9982:                                   AS174 Cogent Communications
## 9983:                                   AS174 Cogent Communications
## 9984:                                   AS174 Cogent Communications
## 9985:                                   AS174 Cogent Communications
## 9986: AS55967 Beijing Baidu Netcom Science and Technology Co., Ltd.
##          city       country countryCode
##    1:  Moscow        Russia          RU
##    2:  Moscow        Russia          RU
##    3:  Moscow        Russia          RU
##    4:  Moscow        Russia          RU
##    5:  Moscow        Russia          RU
##   ---                                  
## 9982:   Miami United States          US
## 9983:   Miami United States          US
## 9984:   Miami United States          US
## 9985:   Miami United States          US
## 9986: Beijing         China          CN
##                                                   isp     lat      lon
##    1:                                    PJSC MegaFon 55.7522  37.6156
##    2:                                    PJSC MegaFon 55.7522  37.6156
##    3:                                    PJSC MegaFon 55.7522  37.6156
##    4:                                    PJSC MegaFon 55.7522  37.6156
##    5:                                    PJSC MegaFon 55.7522  37.6156
##   ---                                                                 
## 9982:                           Cogent Communications 25.8530 -80.2348
## 9983:                           Cogent Communications 25.8530 -80.2348
## 9984:                           Cogent Communications 25.8530 -80.2348
## 9985:                           Cogent Communications 25.8530 -80.2348
## 9986: Beijing Baidu Netcom Science and Technology Co. 39.9289 116.3883
##                                                   org region regionName
##    1:                                    PJSC MegaFon    MOW     Moscow
##    2:                                    PJSC MegaFon    MOW     Moscow
##    3:                                    PJSC MegaFon    MOW     Moscow
##    4:                                    PJSC MegaFon    MOW     Moscow
##    5:                                    PJSC MegaFon    MOW     Moscow
##   ---                                                                  
## 9982:                           Cogent Communications     FL    Florida
## 9983:                           Cogent Communications     FL    Florida
## 9984:                           Cogent Communications     FL    Florida
## 9985:                           Cogent Communications     FL    Florida
## 9986: Beijing Baidu Netcom Science and Technology Co.     11    Beijing
##               timezone    zip verb
##    1:    Europe/Moscow 101194  GET
##    2:    Europe/Moscow 101194  GET
##    3:    Europe/Moscow 101194  GET
##    4:    Europe/Moscow 101194  GET
##    5:    Europe/Moscow 101194  GET
##   ---                             
## 9982: America/New_York  33147  GET
## 9983: America/New_York  33147  GET
## 9984: America/New_York  33147  GET
## 9985: America/New_York  33147  GET
## 9986:    Asia/Shanghai         GET
##                                                                               url
##    1:            /presentations/logstash-monitorama-2013/images/kibana-search.png
##    2:        /presentations/logstash-monitorama-2013/images/kibana-dashboard3.png
##    3:       /presentations/logstash-monitorama-2013/plugin/highlight/highlight.js
##    4:              /presentations/logstash-monitorama-2013/plugin/zoom-js/zoom.js
##    5:               /presentations/logstash-monitorama-2013/plugin/notes/notes.js
##   ---                                                                            
## 9982:       /presentations/logstash-puppetconf-2012/images/stats-negative-min.png
## 9983:                     /presentations/logstash-puppetconf-2012/images/logs.jpg
## 9984: /presentations/logstash-puppetconf-2012/images/apache-negative-duration.png
## 9985:                                                                /favicon.ico
## 9986:                                                                 /robots.txt
##         scheme
##    1: HTTP/1.1
##    2: HTTP/1.1
##    3: HTTP/1.1
##    4: HTTP/1.1
##    5: HTTP/1.1
##   ---         
## 9982: HTTP/1.1
## 9983: HTTP/1.1
## 9984: HTTP/1.1
## 9985: HTTP/1.1
## 9986: HTTP/1.1
##                                                                             path
##    1:            presentations/logstash-monitorama-2013/images/kibana-search.png
##    2:        presentations/logstash-monitorama-2013/images/kibana-dashboard3.png
##    3:       presentations/logstash-monitorama-2013/plugin/highlight/highlight.js
##    4:              presentations/logstash-monitorama-2013/plugin/zoom-js/zoom.js
##    5:               presentations/logstash-monitorama-2013/plugin/notes/notes.js
##   ---                                                                           
## 9982:       presentations/logstash-puppetconf-2012/images/stats-negative-min.png
## 9983:                     presentations/logstash-puppetconf-2012/images/logs.jpg
## 9984: presentations/logstash-puppetconf-2012/images/apache-negative-duration.png
## 9985:                                                                favicon.ico
## 9986:                                                                 robots.txt
##       params order visit timesinceprevrequest
##    1:     NA     2     1               3 secs
##    2:     NA    14     1               9 secs
##    3:     NA    16     1               1 secs
##    4:     NA     5     1               1 secs
##    5:     NA     3     1               4 secs
##   ---                                        
## 9982:     NA    24  1847               6 secs
## 9983:     NA    23  1847               1 secs
## 9984:     NA    14  1847               2 secs
## 9985:     NA    15  1847               2 secs
## 9986:     NA     1  1852              NA secs
logs[visit==1, .(order, time, timesinceprevrequest )]
##     order                time timesinceprevrequest
##  1:     2 2015-05-17 10:05:03               3 secs
##  2:    14 2015-05-17 10:05:43               9 secs
##  3:    16 2015-05-17 10:05:47               1 secs
##  4:     5 2015-05-17 10:05:12               1 secs
##  5:     3 2015-05-17 10:05:07               4 secs
##  6:    13 2015-05-17 10:05:34               1 secs
##  7:    22 2015-05-17 10:05:57               1 secs
##  8:    17 2015-05-17 10:05:50               3 secs
##  9:     7 2015-05-17 10:05:24               5 secs
## 10:    18 2015-05-17 10:05:50               0 secs
## 11:    15 2015-05-17 10:05:46               3 secs
## 12:     4 2015-05-17 10:05:11               4 secs
## 13:     6 2015-05-17 10:05:19               7 secs
## 14:    11 2015-05-17 10:05:33               3 secs
## 15:     1 2015-05-17 10:05:00              NA secs
## 16:     9 2015-05-17 10:05:25               1 secs
## 17:    23 2015-05-17 10:05:59               2 secs
## 18:    10 2015-05-17 10:05:30               5 secs
## 19:    19 2015-05-17 10:05:53               3 secs
## 20:     8 2015-05-17 10:05:24               0 secs
## 21:    20 2015-05-17 10:05:54               1 secs
## 22:    12 2015-05-17 10:05:33               0 secs
## 23:    21 2015-05-17 10:05:56               2 secs
##     order                time timesinceprevrequest

6 Visualising

6.1 Mapping

library(ggmap)
## 
## Attaching package: 'ggmap'
## The following object is masked from 'package:plotly':
## 
##     wind
## The following object is masked from 'package:magrittr':
## 
##     inset
ggplot(map_data('world')) +
  geom_polygon(aes(x = long, y = lat, group = group), fill = 'grey90', colour = 'white') + 
geom_point(aes(x = lon, y = lat, size = N), color = '#2165B6',
           data = logs[, .N, .(lon, lat)]) +
  xlab('') + ylab('') + 
  theme_minimal() + theme('legend.position' = 'top')

6.2 Heatmap

library(ggplot2)

heatmap<-function(ggplot,size=20){
  ggplot+ coord_equal()+
    geom_tile(color="white", size=0.1)+
    labs(x=NULL, y=NULL, title=NULL)+
    scale_x_continuous(breaks=seq(0,24,6))+
    scale_fill_gradient()
}

ip_activity<-logs[,.N,.(country,hour)]
ga<-ggplot(ip_activity[country %like% "^A"], aes(x=hour, y=country, fill=N))
heatmap(ga)

6.3 Flow Diagram

library(DiagrammeR)

URLids<-logs[,.N,.(labels_col=path)][,nodes:=.I][N>50]
activity<-URLids[logs, on=c(labels_col="path")][
  !is.na(nodes),.(visit, order, nodes)]

# Get a cross join of activity
moves<-activity[activity, on=c("visit"), allow.cartesian=TRUE][
  # Filter to only include next site
  order==i.order-1][ 
    # Get nodes and position
    ,.(tooltip=.N),.(from=nodes,to=i.nodes)][,penwidth:=10*tooltip/max(tooltip)]

gr<-create_graph(setDF(URLids), setDF(moves))

render_graph(gr)

7 Time series

top10<-logs[,.N,country][order(-N)[1:10],country]
tz_ts<-logs[country %in% top10,.N,.(country, xts::align.time(time,n=60*5))]
ggplot(tz_ts, aes(x=xts, y=N, group=1))+
  geom_line()+
  geom_smooth()+
  facet_wrap(~country, scales="free_y")
## `geom_smooth()` using method = 'loess'

library(xts)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'xts'
## The following object is masked from 'package:data.table':
## 
##     last
## The following objects are masked from 'package:dplyr':
## 
##     first, last
ts<-logs[order(time),.N,.(time=xts::align.time(time,n=60))]
xts_df<-xts(ts$N,ts$time)
plot(xts_df)

devtools::install_github("twitter/AnomalyDetection")
## Using GitHub PAT from envvar GITHUB_PAT
## Downloading GitHub repo twitter/AnomalyDetection@master
## from URL https://api.github.com/repos/twitter/AnomalyDetection/zipball/master
## Installing AnomalyDetection
## '/home/travis/R-bin/lib/R/bin/R' --no-site-file --no-environ --no-save  \
##   --no-restore --quiet CMD INSTALL  \
##   '/tmp/RtmpuEYtJl/devtoolsfac62c8a34a5/twitter-AnomalyDetection-1f5deaa'  \
##   --library='/home/travis/R/Library' --install-tests
## 
library(AnomalyDetection)
AnomalyDetectionTs(setDF(logs[,.N,align.time(time)]), max_anoms=0.05, direction='both',plot=TRUE)
## $anoms
##             timestamp anoms
## 1 2015-05-17 10:06:00    74
## 2 2015-05-18 10:06:00   132
## 3 2015-05-19 14:06:00   134
## 4 2015-05-20 21:06:00    78
## 
## $plot